AOMedia AV1 Codec
svc_encoder_rtc
1/*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12// This is an example demonstrating how to implement a multi-layer AOM
13// encoding scheme for RTC video applications.
14
15#include <assert.h>
16#include <limits.h>
17#include <math.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21
22#include <memory>
23
24#include "config/aom_config.h"
25
26#if CONFIG_AV1_DECODER
27#include "aom/aom_decoder.h"
28#endif
29#include "aom/aom_encoder.h"
30#include "aom/aom_image.h"
31#include "aom/aom_integer.h"
32#include "aom/aomcx.h"
33#include "aom_dsp/bitwriter_buffer.h"
34#include "aom_ports/aom_timer.h"
35#include "av1/ratectrl_rtc.h"
36#include "common/args.h"
37#include "common/tools_common.h"
38#include "common/video_writer.h"
39#include "examples/encoder_util.h"
40#include "examples/multilayer_metadata.h"
41
42#define OPTION_BUFFER_SIZE 1024
43#define MAX_NUM_SPATIAL_LAYERS 4
44
45typedef struct {
46 const char *output_filename;
47 char options[OPTION_BUFFER_SIZE];
48 struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
49 int speed;
50 int aq_mode;
51 int layering_mode;
52 int output_obu;
53 int decode;
54 int tune_content;
55 int show_psnr;
56 bool use_external_rc;
57 bool scale_factors_explicitly_set;
58 const char *multilayer_metadata_file;
59} AppInput;
60
61typedef enum {
62 QUANTIZER = 0,
63 BITRATE,
64 SCALE_FACTOR,
65 AUTO_ALT_REF,
66 ALL_OPTION_TYPES
67} LAYER_OPTION_TYPE;
68
69static const arg_def_t outputfile =
70 ARG_DEF("o", "output", 1, "Output filename");
71static const arg_def_t frames_arg =
72 ARG_DEF("f", "frames", 1, "Number of frames to encode");
73static const arg_def_t threads_arg =
74 ARG_DEF("th", "threads", 1, "Number of threads to use");
75static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
76static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
77static const arg_def_t timebase_arg =
78 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
79static const arg_def_t bitrate_arg = ARG_DEF(
80 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
81static const arg_def_t spatial_layers_arg =
82 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
83static const arg_def_t temporal_layers_arg =
84 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
85static const arg_def_t layering_mode_arg =
86 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
87static const arg_def_t kf_dist_arg =
88 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
89static const arg_def_t scale_factors_arg =
90 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
91static const arg_def_t min_q_arg =
92 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
93static const arg_def_t max_q_arg =
94 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
95static const arg_def_t speed_arg =
96 ARG_DEF("sp", "speed", 1, "Speed configuration");
97static const arg_def_t aqmode_arg =
98 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
99static const arg_def_t bitrates_arg =
100 ARG_DEF("bl", "bitrates", 1,
101 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
102static const arg_def_t dropframe_thresh_arg =
103 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
104static const arg_def_t error_resilient_arg =
105 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
106static const arg_def_t output_obu_arg =
107 ARG_DEF(NULL, "output-obu", 1,
108 "Write OBUs when set to 1. Otherwise write IVF files.");
109static const arg_def_t test_decode_arg =
110 ARG_DEF(NULL, "test-decode", 1,
111 "Attempt to test decoding the output when set to 1. Default is 1.");
112static const arg_def_t psnr_arg =
113 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
114static const arg_def_t ext_rc_arg =
115 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
116static const struct arg_enum_list tune_content_enum[] = {
117 { "default", AOM_CONTENT_DEFAULT },
118 { "screen", AOM_CONTENT_SCREEN },
119 { "film", AOM_CONTENT_FILM },
120 { NULL, 0 }
121};
122static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
123 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
124#if CONFIG_CWG_E050
125static const arg_def_t multilayer_metadata_file_arg =
126 ARG_DEF("ml", "multilayer_metadata_file", 1,
127 "Experimental: path to multilayer metadata file");
128#endif
129
130#if CONFIG_AV1_HIGHBITDEPTH
131static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
132 { "10", AOM_BITS_10 },
133 { NULL, 0 } };
134
135static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
136 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
137#endif // CONFIG_AV1_HIGHBITDEPTH
138
139static const arg_def_t *svc_args[] = {
140 &frames_arg,
141 &outputfile,
142 &width_arg,
143 &height_arg,
144 &timebase_arg,
145 &bitrate_arg,
146 &spatial_layers_arg,
147 &kf_dist_arg,
148 &scale_factors_arg,
149 &min_q_arg,
150 &max_q_arg,
151 &temporal_layers_arg,
152 &layering_mode_arg,
153 &threads_arg,
154 &aqmode_arg,
155#if CONFIG_AV1_HIGHBITDEPTH
156 &bitdepth_arg,
157#endif
158 &speed_arg,
159 &bitrates_arg,
160 &dropframe_thresh_arg,
161 &error_resilient_arg,
162 &output_obu_arg,
163 &test_decode_arg,
164 &tune_content_arg,
165 &psnr_arg,
166#if CONFIG_CWG_E050
167 &multilayer_metadata_file_arg,
168#endif
169 NULL,
170};
171
172#define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
173
174static const char *exec_name;
175
176void usage_exit(void) {
177 fprintf(stderr,
178 "Usage: %s <options> input_filename [input_filename ...] -o "
179 "output_filename\n",
180 exec_name);
181 fprintf(stderr, "Options:\n");
182 arg_show_usage(stderr, svc_args);
183 fprintf(
184 stderr,
185 "Input files must be y4m or yuv.\n"
186 "If multiple input files are specified, they correspond to spatial "
187 "layers, and there should be as many as there are spatial layers.\n"
188 "All input files must have the same width, height, frame rate and number "
189 "of frames.\n"
190 "If only one file is specified, it is used for all spatial layers.\n");
191 exit(EXIT_FAILURE);
192}
193
194static int file_is_y4m(const char detect[4]) {
195 return memcmp(detect, "YUV4", 4) == 0;
196}
197
198static int fourcc_is_ivf(const char detect[4]) {
199 if (memcmp(detect, "DKIF", 4) == 0) {
200 return 1;
201 }
202 return 0;
203}
204
205static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
206 1 };
207
208static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
209
210static void open_input_file(struct AvxInputContext *input,
212 /* Parse certain options from the input file, if possible */
213 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
214 : set_binary_mode(stdin);
215
216 if (!input->file) fatal("Failed to open input file");
217
218 if (!fseeko(input->file, 0, SEEK_END)) {
219 /* Input file is seekable. Figure out how long it is, so we can get
220 * progress info.
221 */
222 input->length = ftello(input->file);
223 rewind(input->file);
224 }
225
226 /* Default to 1:1 pixel aspect ratio. */
227 input->pixel_aspect_ratio.numerator = 1;
228 input->pixel_aspect_ratio.denominator = 1;
229
230 /* For RAW input sources, these bytes will applied on the first frame
231 * in read_frame().
232 */
233 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
234 input->detect.position = 0;
235
236 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
237 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
238 input->only_i420) >= 0) {
239 input->file_type = FILE_TYPE_Y4M;
240 input->width = input->y4m.pic_w;
241 input->height = input->y4m.pic_h;
242 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
243 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
244 input->framerate.numerator = input->y4m.fps_n;
245 input->framerate.denominator = input->y4m.fps_d;
246 input->fmt = input->y4m.aom_fmt;
247 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
248 } else {
249 fatal("Unsupported Y4M stream.");
250 }
251 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
252 fatal("IVF is not supported as input.");
253 } else {
254 input->file_type = FILE_TYPE_RAW;
255 }
256}
257
258static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
259 int *value0, int *value1) {
260 if (type == SCALE_FACTOR) {
261 *value0 = (int)strtol(input, &input, 10);
262 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
263 *value1 = (int)strtol(input, &input, 10);
264
265 if (*value0 < option_min_values[SCALE_FACTOR] ||
266 *value1 < option_min_values[SCALE_FACTOR] ||
267 *value0 > option_max_values[SCALE_FACTOR] ||
268 *value1 > option_max_values[SCALE_FACTOR] ||
269 *value0 > *value1) // num shouldn't be greater than den
271 } else {
272 *value0 = atoi(input);
273 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
275 }
276 return AOM_CODEC_OK;
277}
278
279static aom_codec_err_t parse_layer_options_from_string(
280 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
281 int *option0, int *option1) {
283 char *input_string;
284 char *token;
285 const char *delim = ",";
286 int num_layers = svc_params->number_spatial_layers;
287 int i = 0;
288
289 if (type == BITRATE)
290 num_layers =
291 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
292
293 if (input == NULL || option0 == NULL ||
294 (option1 == NULL && type == SCALE_FACTOR))
296
297 const size_t input_length = strlen(input);
298 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
299 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
300 memcpy(input_string, input, input_length + 1);
301 token = strtok(input_string, delim); // NOLINT
302 for (i = 0; i < num_layers; ++i) {
303 if (token != NULL) {
304 res = extract_option(type, token, option0 + i, option1 + i);
305 if (res != AOM_CODEC_OK) break;
306 token = strtok(NULL, delim); // NOLINT
307 } else {
309 break;
310 }
311 }
312 free(input_string);
313 return res;
314}
315
316static void parse_command_line(int argc, const char **argv_,
317 AppInput *app_input,
318 aom_svc_params_t *svc_params,
319 aom_codec_enc_cfg_t *enc_cfg) {
320 struct arg arg;
321 char **argv = NULL;
322 char **argi = NULL;
323 char **argj = NULL;
324 char string_options[1024] = { 0 };
325
326 // Default settings
327 svc_params->number_spatial_layers = 1;
328 svc_params->number_temporal_layers = 1;
329 app_input->layering_mode = 0;
330 app_input->output_obu = 0;
331 app_input->decode = 1;
332 enc_cfg->g_threads = 1;
333 enc_cfg->rc_end_usage = AOM_CBR;
334
335 // process command line options
336 argv = argv_dup(argc - 1, argv_ + 1);
337 if (!argv) {
338 fprintf(stderr, "Error allocating argument list\n");
339 exit(EXIT_FAILURE);
340 }
341 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
342 arg.argv_step = 1;
343
344 if (arg_match(&arg, &outputfile, argi)) {
345 app_input->output_filename = arg.val;
346 } else if (arg_match(&arg, &width_arg, argi)) {
347 enc_cfg->g_w = arg_parse_uint(&arg);
348 } else if (arg_match(&arg, &height_arg, argi)) {
349 enc_cfg->g_h = arg_parse_uint(&arg);
350 } else if (arg_match(&arg, &timebase_arg, argi)) {
351 enc_cfg->g_timebase = arg_parse_rational(&arg);
352 } else if (arg_match(&arg, &bitrate_arg, argi)) {
353 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
354 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
355 svc_params->number_spatial_layers = arg_parse_uint(&arg);
356 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
357 svc_params->number_temporal_layers = arg_parse_uint(&arg);
358 } else if (arg_match(&arg, &speed_arg, argi)) {
359 app_input->speed = arg_parse_uint(&arg);
360 if (app_input->speed > 11) {
361 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
362 }
363 } else if (arg_match(&arg, &aqmode_arg, argi)) {
364 app_input->aq_mode = arg_parse_uint(&arg);
365 } else if (arg_match(&arg, &threads_arg, argi)) {
366 enc_cfg->g_threads = arg_parse_uint(&arg);
367 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
368 app_input->layering_mode = arg_parse_int(&arg);
369 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
370 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
371 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
372 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
373 aom_codec_err_t res = parse_layer_options_from_string(
374 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
375 svc_params->scaling_factor_den);
376 app_input->scale_factors_explicitly_set = true;
377 if (res != AOM_CODEC_OK) {
378 die("Failed to parse scale factors: %s\n",
380 }
381 } else if (arg_match(&arg, &min_q_arg, argi)) {
382 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
383 } else if (arg_match(&arg, &max_q_arg, argi)) {
384 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
385#if CONFIG_AV1_HIGHBITDEPTH
386 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
387 enc_cfg->g_bit_depth =
388 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
389 switch (enc_cfg->g_bit_depth) {
390 case AOM_BITS_8:
391 enc_cfg->g_input_bit_depth = 8;
392 enc_cfg->g_profile = 0;
393 break;
394 case AOM_BITS_10:
395 enc_cfg->g_input_bit_depth = 10;
396 enc_cfg->g_profile = 0;
397 break;
398 default:
399 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
400 }
401#endif // CONFIG_VP9_HIGHBITDEPTH
402 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
403 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
404 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
405 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
406 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
407 die("Invalid value for error resilient (0, 1): %d.",
408 enc_cfg->g_error_resilient);
409 } else if (arg_match(&arg, &output_obu_arg, argi)) {
410 app_input->output_obu = arg_parse_uint(&arg);
411 if (app_input->output_obu != 0 && app_input->output_obu != 1)
412 die("Invalid value for obu output flag (0, 1): %d.",
413 app_input->output_obu);
414 } else if (arg_match(&arg, &test_decode_arg, argi)) {
415 app_input->decode = arg_parse_uint(&arg);
416 if (app_input->decode != 0 && app_input->decode != 1)
417 die("Invalid value for test decode flag (0, 1): %d.",
418 app_input->decode);
419 } else if (arg_match(&arg, &tune_content_arg, argi)) {
420 app_input->tune_content = arg_parse_enum_or_int(&arg);
421 printf("tune content %d\n", app_input->tune_content);
422 } else if (arg_match(&arg, &psnr_arg, argi)) {
423 app_input->show_psnr = 1;
424 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
425 app_input->use_external_rc = true;
426#if CONFIG_CWG_E050
427 } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
428 app_input->multilayer_metadata_file = arg.val;
429#endif
430 } else {
431 ++argj;
432 }
433 }
434
435 // Total bitrate needs to be parsed after the number of layers.
436 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
437 arg.argv_step = 1;
438 if (arg_match(&arg, &bitrates_arg, argi)) {
439 aom_codec_err_t res = parse_layer_options_from_string(
440 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
441 if (res != AOM_CODEC_OK) {
442 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
443 }
444 } else {
445 ++argj;
446 }
447 }
448
449 // There will be a space in front of the string options
450 if (strlen(string_options) > 0)
451 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
452
453 // Check for unrecognized options
454 for (argi = argv; *argi; ++argi)
455 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
456 die("Error: Unrecognized option %s\n", *argi);
457
458 if (argv[0] == NULL) {
459 usage_exit();
460 }
461
462 int input_count = 0;
463 while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
464 app_input->input_ctx[input_count].filename = argv[input_count];
465 ++input_count;
466 }
467 if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
468 die("Error: Number of input files does not match number of spatial layers");
469 }
470 if (argv[input_count] != NULL) {
471 die("Error: Too many input files specified, there should be at most %d",
472 MAX_NUM_SPATIAL_LAYERS);
473 }
474
475 free(argv);
476
477 for (int i = 0; i < input_count; ++i) {
478 open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
479 if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
480 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
481 // Override these settings with the info from Y4M file.
482 enc_cfg->g_w = app_input->input_ctx[i].width;
483 enc_cfg->g_h = app_input->input_ctx[i].height;
484 // g_timebase is the reciprocal of frame rate.
485 enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
486 enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
487 } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
488 enc_cfg->g_h != app_input->input_ctx[i].height ||
489 enc_cfg->g_timebase.num !=
490 app_input->input_ctx[i].framerate.denominator ||
491 enc_cfg->g_timebase.den !=
492 app_input->input_ctx[i].framerate.numerator) {
493 die("Error: Input file dimensions and/or frame rate mismatch");
494 }
495 }
496 }
497 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
498 die("Error: Input file dimensions not set, use -w and -h");
499 }
500
501 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
502 enc_cfg->g_h % 2)
503 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
504
505 printf(
506 "Codec %s\n"
507 "layers: %d\n"
508 "width %u, height: %u\n"
509 "num: %d, den: %d, bitrate: %u\n"
510 "gop size: %u\n",
512 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
513 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
514 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
515}
516
517static const int mode_to_num_temporal_layers[12] = {
518 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
519};
520static const int mode_to_num_spatial_layers[12] = {
521 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
522};
523
524// For rate control encoding stats.
525struct RateControlMetrics {
526 // Number of input frames per layer.
527 int layer_input_frames[AOM_MAX_TS_LAYERS];
528 // Number of encoded non-key frames per layer.
529 int layer_enc_frames[AOM_MAX_TS_LAYERS];
530 // Framerate per layer layer (cumulative).
531 double layer_framerate[AOM_MAX_TS_LAYERS];
532 // Target average frame size per layer (per-frame-bandwidth per layer).
533 double layer_pfb[AOM_MAX_LAYERS];
534 // Actual average frame size per layer.
535 double layer_avg_frame_size[AOM_MAX_LAYERS];
536 // Average rate mismatch per layer (|target - actual| / target).
537 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
538 // Actual encoding bitrate per layer (cumulative across temporal layers).
539 double layer_encoding_bitrate[AOM_MAX_LAYERS];
540 // Average of the short-time encoder actual bitrate.
541 // TODO(marpan): Should we add these short-time stats for each layer?
542 double avg_st_encoding_bitrate;
543 // Variance of the short-time encoder actual bitrate.
544 double variance_st_encoding_bitrate;
545 // Window (number of frames) for computing short-timee encoding bitrate.
546 int window_size;
547 // Number of window measurements.
548 int window_count;
549 int layer_target_bitrate[AOM_MAX_LAYERS];
550};
551
552static const int REF_FRAMES = 8;
553
554static const int INTER_REFS_PER_FRAME = 7;
555
556// Reference frames used in this example encoder.
557enum {
558 SVC_LAST_FRAME = 0,
559 SVC_LAST2_FRAME,
560 SVC_LAST3_FRAME,
561 SVC_GOLDEN_FRAME,
562 SVC_BWDREF_FRAME,
563 SVC_ALTREF2_FRAME,
564 SVC_ALTREF_FRAME
565};
566
567static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
568 FILE *f = input_ctx->file;
569 y4m_input *y4m = &input_ctx->y4m;
570 int shortread = 0;
571
572 if (input_ctx->file_type == FILE_TYPE_Y4M) {
573 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
574 } else {
575 shortread = read_yuv_frame(input_ctx, img);
576 }
577
578 return !shortread;
579}
580
581static void close_input_file(struct AvxInputContext *input) {
582 fclose(input->file);
583 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
584}
585
586// Note: these rate control metrics assume only 1 key frame in the
587// sequence (i.e., first frame only). So for temporal pattern# 7
588// (which has key frame for every frame on base layer), the metrics
589// computation will be off/wrong.
590// TODO(marpan): Update these metrics to account for multiple key frames
591// in the stream.
592static void set_rate_control_metrics(struct RateControlMetrics *rc,
593 double framerate, int ss_number_layers,
594 int ts_number_layers) {
595 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
596 ts_rate_decimator[0] = 1;
597 if (ts_number_layers == 2) {
598 ts_rate_decimator[0] = 2;
599 ts_rate_decimator[1] = 1;
600 }
601 if (ts_number_layers == 3) {
602 ts_rate_decimator[0] = 4;
603 ts_rate_decimator[1] = 2;
604 ts_rate_decimator[2] = 1;
605 }
606 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
607 // per-frame-bandwidth, for the rate control encoding stats below.
608 for (int sl = 0; sl < ss_number_layers; ++sl) {
609 int i = sl * ts_number_layers;
610 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
611 rc->layer_pfb[i] =
612 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
613 for (int tl = 0; tl < ts_number_layers; ++tl) {
614 i = sl * ts_number_layers + tl;
615 if (tl > 0) {
616 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
617 rc->layer_pfb[i] =
618 1000.0 *
619 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
620 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
621 }
622 rc->layer_input_frames[tl] = 0;
623 rc->layer_enc_frames[tl] = 0;
624 rc->layer_encoding_bitrate[i] = 0.0;
625 rc->layer_avg_frame_size[i] = 0.0;
626 rc->layer_avg_rate_mismatch[i] = 0.0;
627 }
628 }
629 rc->window_count = 0;
630 rc->window_size = 15;
631 rc->avg_st_encoding_bitrate = 0.0;
632 rc->variance_st_encoding_bitrate = 0.0;
633}
634
635static void printout_rate_control_summary(struct RateControlMetrics *rc,
636 int frame_cnt, int ss_number_layers,
637 int ts_number_layers) {
638 int tot_num_frames = 0;
639 double perc_fluctuation = 0.0;
640 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
641 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
642 for (int sl = 0; sl < ss_number_layers; ++sl) {
643 tot_num_frames = 0;
644 for (int tl = 0; tl < ts_number_layers; ++tl) {
645 int i = sl * ts_number_layers + tl;
646 const int num_dropped =
647 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
648 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
649 tot_num_frames += rc->layer_input_frames[tl];
650 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
651 rc->layer_encoding_bitrate[i] /
652 tot_num_frames;
653 rc->layer_avg_frame_size[i] =
654 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
655 rc->layer_avg_rate_mismatch[i] =
656 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
657 printf("For layer#: %d %d \n", sl, tl);
658 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
659 rc->layer_encoding_bitrate[i]);
660 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
661 rc->layer_avg_frame_size[i]);
662 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
663 printf(
664 "Number of input frames, encoded (non-key) frames, "
665 "and perc dropped frames: %d %d %f\n",
666 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
667 100.0 * num_dropped / rc->layer_input_frames[tl]);
668 printf("\n");
669 }
670 }
671 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
672 rc->variance_st_encoding_bitrate =
673 rc->variance_st_encoding_bitrate / rc->window_count -
674 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
675 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
676 rc->avg_st_encoding_bitrate;
677 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
678 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
679 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
680 perc_fluctuation);
681 if (frame_cnt - 1 != tot_num_frames)
682 die("Error: Number of input frames not equal to output!\n");
683}
684
685// Layer pattern configuration.
686static void set_layer_pattern(
687 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
688 aom_svc_ref_frame_config_t *ref_frame_config,
689 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
690 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
691 // Setting this flag to 1 enables simplex example of
692 // RPS (Reference Picture Selection) for 1 layer.
693 int use_rps_example = 0;
694 int i;
695 int enable_longterm_temporal_ref = 1;
696 int shift = (layering_mode == 8) ? 2 : 0;
697 int simulcast_mode = (layering_mode == 11);
698 *use_svc_control = 1;
699 layer_id->spatial_layer_id = spatial_layer_id;
700 int lag_index = 0;
701 int base_count = superframe_cnt >> 2;
702 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
703 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
704 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
705 // Set the reference map buffer idx for the 7 references:
706 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
707 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
708 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
709 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
710 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
711
712 if (ksvc_mode) {
713 // Same pattern as case 9, but the reference strucutre will be constrained
714 // below.
715 layering_mode = 9;
716 }
717 switch (layering_mode) {
718 case 0:
719 if (use_rps_example == 0) {
720 // 1-layer: update LAST on every frame, reference LAST.
721 layer_id->temporal_layer_id = 0;
722 layer_id->spatial_layer_id = 0;
723 ref_frame_config->refresh[0] = 1;
724 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
725 } else {
726 // Pattern of 2 references (ALTREF and GOLDEN) trailing
727 // LAST by 4 and 8 frames, with some switching logic to
728 // sometimes only predict from the longer-term reference
729 //(golden here). This is simple example to test RPS
730 // (reference picture selection).
731 int last_idx = 0;
732 int last_idx_refresh = 0;
733 int gld_idx = 0;
734 int alt_ref_idx = 0;
735 int lag_alt = 4;
736 int lag_gld = 8;
737 layer_id->temporal_layer_id = 0;
738 layer_id->spatial_layer_id = 0;
739 int sh = 8; // slots 0 - 7.
740 // Moving index slot for last: 0 - (sh - 1)
741 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
742 // Moving index for refresh of last: one ahead for next frame.
743 last_idx_refresh = superframe_cnt % sh;
744 // Moving index for gld_ref, lag behind current by lag_gld
745 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
746 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
747 if (superframe_cnt > lag_alt)
748 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
749 // Set the ref_idx.
750 // Default all references to slot for last.
751 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
752 ref_frame_config->ref_idx[i] = last_idx;
753 // Set the ref_idx for the relevant references.
754 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
755 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
756 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
757 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
758 // Refresh this slot, which will become LAST on next frame.
759 ref_frame_config->refresh[last_idx_refresh] = 1;
760 // Reference LAST, ALTREF, and GOLDEN
761 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
762 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
763 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
764 // Switch to only GOLDEN every 300 frames.
765 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
766 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
767 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
768 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
769 // Test if the long-term is LAST instead, this is just a renaming
770 // but its tests if encoder behaves the same, whether its
771 // LAST or GOLDEN.
772 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
773 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
774 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
775 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
776 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
777 }
778 }
779 }
780 break;
781 case 1:
782 // 2-temporal layer.
783 // 1 3 5
784 // 0 2 4
785 // Keep golden fixed at slot 3.
786 base_count = superframe_cnt >> 1;
787 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
788 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
789 lag_index = 5;
790 if (base_count > 0) {
791 lag_index = 5 + (base_count % 3);
792 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
793 }
794 // Set the altref slot to lag_index.
795 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
796 if (superframe_cnt % 2 == 0) {
797 layer_id->temporal_layer_id = 0;
798 // Update LAST on layer 0, reference LAST.
799 ref_frame_config->refresh[0] = 1;
800 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
801 // Refresh lag_index slot, needed for lagging golen.
802 ref_frame_config->refresh[lag_index] = 1;
803 // Refresh GOLDEN every x base layer frames.
804 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
805 } else {
806 layer_id->temporal_layer_id = 1;
807 // No updates on layer 1, reference LAST (TL0).
808 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
809 }
810 // Always reference golden and altref on TL0.
811 if (layer_id->temporal_layer_id == 0) {
812 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
813 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
814 }
815 break;
816 case 2:
817 // 3-temporal layer:
818 // 1 3 5 7
819 // 2 6
820 // 0 4 8
821 if (superframe_cnt % 4 == 0) {
822 // Base layer.
823 layer_id->temporal_layer_id = 0;
824 // Update LAST on layer 0, reference LAST.
825 ref_frame_config->refresh[0] = 1;
826 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
827 } else if ((superframe_cnt - 1) % 4 == 0) {
828 layer_id->temporal_layer_id = 2;
829 // First top layer: no updates, only reference LAST (TL0).
830 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
831 } else if ((superframe_cnt - 2) % 4 == 0) {
832 layer_id->temporal_layer_id = 1;
833 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
834 ref_frame_config->refresh[1] = 1;
835 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836 } else if ((superframe_cnt - 3) % 4 == 0) {
837 layer_id->temporal_layer_id = 2;
838 // Second top layer: no updates, only reference LAST.
839 // Set buffer idx for LAST to slot 1, since that was the slot
840 // updated in previous frame. So LAST is TL1 frame.
841 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
842 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
843 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
844 }
845 break;
846 case 3:
847 // 3 TL, same as above, except allow for predicting
848 // off 2 more references (GOLDEN and ALTREF), with
849 // GOLDEN updated periodically, and ALTREF lagging from
850 // LAST from ~4 frames. Both GOLDEN and ALTREF
851 // can only be updated on base temporal layer.
852
853 // Keep golden fixed at slot 3.
854 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
855 // Cyclically refresh slots 5, 6, 7, for lag altref.
856 lag_index = 5;
857 if (base_count > 0) {
858 lag_index = 5 + (base_count % 3);
859 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
860 }
861 // Set the altref slot to lag_index.
862 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
863 if (superframe_cnt % 4 == 0) {
864 // Base layer.
865 layer_id->temporal_layer_id = 0;
866 // Update LAST on layer 0, reference LAST.
867 ref_frame_config->refresh[0] = 1;
868 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
869 // Refresh GOLDEN every x ~10 base layer frames.
870 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
871 // Refresh lag_index slot, needed for lagging altref.
872 ref_frame_config->refresh[lag_index] = 1;
873 } else if ((superframe_cnt - 1) % 4 == 0) {
874 layer_id->temporal_layer_id = 2;
875 // First top layer: no updates, only reference LAST (TL0).
876 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
877 } else if ((superframe_cnt - 2) % 4 == 0) {
878 layer_id->temporal_layer_id = 1;
879 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
880 ref_frame_config->refresh[1] = 1;
881 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
882 } else if ((superframe_cnt - 3) % 4 == 0) {
883 layer_id->temporal_layer_id = 2;
884 // Second top layer: no updates, only reference LAST.
885 // Set buffer idx for LAST to slot 1, since that was the slot
886 // updated in previous frame. So LAST is TL1 frame.
887 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
888 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
889 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
890 }
891 // Every frame can reference GOLDEN AND ALTREF.
892 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
893 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
894 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
895 if (speed >= 7) {
896 ref_frame_comp_pred->use_comp_pred[2] = 1;
897 ref_frame_comp_pred->use_comp_pred[0] = 1;
898 }
899 break;
900 case 4:
901 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
902 // only reference GF (not LAST). Other frames only reference LAST.
903 // 1 3 5 7
904 // 2 6
905 // 0 4 8
906 if (superframe_cnt % 4 == 0) {
907 // Base layer.
908 layer_id->temporal_layer_id = 0;
909 // Update LAST on layer 0, only reference LAST.
910 ref_frame_config->refresh[0] = 1;
911 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
912 } else if ((superframe_cnt - 1) % 4 == 0) {
913 layer_id->temporal_layer_id = 2;
914 // First top layer: no updates, only reference LAST (TL0).
915 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
916 } else if ((superframe_cnt - 2) % 4 == 0) {
917 layer_id->temporal_layer_id = 1;
918 // Middle layer (TL1): update GF, only reference LAST (TL0).
919 ref_frame_config->refresh[3] = 1;
920 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
921 } else if ((superframe_cnt - 3) % 4 == 0) {
922 layer_id->temporal_layer_id = 2;
923 // Second top layer: no updates, only reference GF.
924 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
925 }
926 break;
927 case 5:
928 // 2 spatial layers, 1 temporal.
929 layer_id->temporal_layer_id = 0;
930 if (layer_id->spatial_layer_id == 0) {
931 // Reference LAST, update LAST.
932 ref_frame_config->refresh[0] = 1;
933 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
934 } else if (layer_id->spatial_layer_id == 1) {
935 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
936 // and GOLDEN to slot 0. Update slot 1 (LAST).
937 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
938 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
939 ref_frame_config->refresh[1] = 1;
940 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
941 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
942 }
943 break;
944 case 6:
945 // 3 spatial layers, 1 temporal.
946 // Note for this case, we set the buffer idx for all references to be
947 // either LAST or GOLDEN, which are always valid references, since decoder
948 // will check if any of the 7 references is valid scale in
949 // valid_ref_frame_size().
950 layer_id->temporal_layer_id = 0;
951 if (layer_id->spatial_layer_id == 0) {
952 // Reference LAST, update LAST. Set all buffer_idx to 0.
953 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
954 ref_frame_config->ref_idx[i] = 0;
955 ref_frame_config->refresh[0] = 1;
956 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
957 } else if (layer_id->spatial_layer_id == 1) {
958 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
959 // and GOLDEN (and all other refs) to slot 0.
960 // Update slot 1 (LAST).
961 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
962 ref_frame_config->ref_idx[i] = 0;
963 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964 ref_frame_config->refresh[1] = 1;
965 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
966 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
967 } else if (layer_id->spatial_layer_id == 2) {
968 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
969 // and GOLDEN (and all other refs) to slot 1.
970 // Update slot 2 (LAST).
971 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
972 ref_frame_config->ref_idx[i] = 1;
973 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
974 ref_frame_config->refresh[2] = 1;
975 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
976 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
977 // For 3 spatial layer case: allow for top spatial layer to use
978 // additional temporal reference. Update every 10 frames.
979 if (enable_longterm_temporal_ref) {
980 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
981 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
982 if (base_count % 10 == 0)
983 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
984 }
985 }
986 break;
987 case 7:
988 // 2 spatial and 3 temporal layer.
989 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
990 if (superframe_cnt % 4 == 0) {
991 // Base temporal layer
992 layer_id->temporal_layer_id = 0;
993 if (layer_id->spatial_layer_id == 0) {
994 // Reference LAST, update LAST
995 // Set all buffer_idx to 0
996 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
997 ref_frame_config->ref_idx[i] = 0;
998 ref_frame_config->refresh[0] = 1;
999 } else if (layer_id->spatial_layer_id == 1) {
1000 // Reference LAST and GOLDEN.
1001 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1002 ref_frame_config->ref_idx[i] = 0;
1003 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1004 ref_frame_config->refresh[1] = 1;
1005 }
1006 } else if ((superframe_cnt - 1) % 4 == 0) {
1007 // First top temporal enhancement layer.
1008 layer_id->temporal_layer_id = 2;
1009 if (layer_id->spatial_layer_id == 0) {
1010 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1011 ref_frame_config->ref_idx[i] = 0;
1012 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1013 ref_frame_config->refresh[3] = 1;
1014 } else if (layer_id->spatial_layer_id == 1) {
1015 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1016 // GOLDEN (and all other refs) to slot 3.
1017 // No update.
1018 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1019 ref_frame_config->ref_idx[i] = 3;
1020 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1021 }
1022 } else if ((superframe_cnt - 2) % 4 == 0) {
1023 // Middle temporal enhancement layer.
1024 layer_id->temporal_layer_id = 1;
1025 if (layer_id->spatial_layer_id == 0) {
1026 // Reference LAST.
1027 // Set all buffer_idx to 0.
1028 // Set GOLDEN to slot 5 and update slot 5.
1029 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1030 ref_frame_config->ref_idx[i] = 0;
1031 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1032 ref_frame_config->refresh[5 - shift] = 1;
1033 } else if (layer_id->spatial_layer_id == 1) {
1034 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1035 // GOLDEN (and all other refs) to slot 5.
1036 // Set LAST3 to slot 6 and update slot 6.
1037 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1038 ref_frame_config->ref_idx[i] = 5 - shift;
1039 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1040 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1041 ref_frame_config->refresh[6 - shift] = 1;
1042 }
1043 } else if ((superframe_cnt - 3) % 4 == 0) {
1044 // Second top temporal enhancement layer.
1045 layer_id->temporal_layer_id = 2;
1046 if (layer_id->spatial_layer_id == 0) {
1047 // Set LAST to slot 5 and reference LAST.
1048 // Set GOLDEN to slot 3 and update slot 3.
1049 // Set all other buffer_idx to 0.
1050 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1051 ref_frame_config->ref_idx[i] = 0;
1052 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1053 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1054 ref_frame_config->refresh[3] = 1;
1055 } else if (layer_id->spatial_layer_id == 1) {
1056 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1057 // GOLDEN to slot 3. No update.
1058 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1059 ref_frame_config->ref_idx[i] = 0;
1060 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1061 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1062 }
1063 }
1064 break;
1065 case 8:
1066 // 3 spatial and 3 temporal layer.
1067 // Same as case 9 but overalap in the buffer slot updates.
1068 // (shift = 2). The slots 3 and 4 updated by first TL2 are
1069 // reused for update in TL1 superframe.
1070 // Note for this case, frame order hint must be disabled for
1071 // lower resolutios (operating points > 0) to be decoedable.
1072 case 9:
1073 // 3 spatial and 3 temporal layer.
1074 // No overlap in buffer updates between TL2 and TL1.
1075 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1076 // Set the references via the svc_ref_frame_config control.
1077 // Always reference LAST.
1078 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1079 if (superframe_cnt % 4 == 0) {
1080 // Base temporal layer.
1081 layer_id->temporal_layer_id = 0;
1082 if (layer_id->spatial_layer_id == 0) {
1083 // Reference LAST, update LAST.
1084 // Set all buffer_idx to 0.
1085 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1086 ref_frame_config->ref_idx[i] = 0;
1087 ref_frame_config->refresh[0] = 1;
1088 } else if (layer_id->spatial_layer_id == 1) {
1089 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1090 // GOLDEN (and all other refs) to slot 0.
1091 // Update slot 1 (LAST).
1092 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1093 ref_frame_config->ref_idx[i] = 0;
1094 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1095 ref_frame_config->refresh[1] = 1;
1096 } else if (layer_id->spatial_layer_id == 2) {
1097 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1098 // GOLDEN (and all other refs) to slot 1.
1099 // Update slot 2 (LAST).
1100 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1101 ref_frame_config->ref_idx[i] = 1;
1102 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1103 ref_frame_config->refresh[2] = 1;
1104 }
1105 } else if ((superframe_cnt - 1) % 4 == 0) {
1106 // First top temporal enhancement layer.
1107 layer_id->temporal_layer_id = 2;
1108 if (layer_id->spatial_layer_id == 0) {
1109 // Reference LAST (slot 0).
1110 // Set GOLDEN to slot 3 and update slot 3.
1111 // Set all other buffer_idx to slot 0.
1112 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1113 ref_frame_config->ref_idx[i] = 0;
1114 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1115 ref_frame_config->refresh[3] = 1;
1116 } else if (layer_id->spatial_layer_id == 1) {
1117 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1118 // GOLDEN (and all other refs) to slot 3.
1119 // Set LAST2 to slot 4 and Update slot 4.
1120 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1121 ref_frame_config->ref_idx[i] = 3;
1122 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1123 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1124 ref_frame_config->refresh[4] = 1;
1125 } else if (layer_id->spatial_layer_id == 2) {
1126 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1127 // GOLDEN (and all other refs) to slot 4.
1128 // No update.
1129 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1130 ref_frame_config->ref_idx[i] = 4;
1131 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1132 }
1133 } else if ((superframe_cnt - 2) % 4 == 0) {
1134 // Middle temporal enhancement layer.
1135 layer_id->temporal_layer_id = 1;
1136 if (layer_id->spatial_layer_id == 0) {
1137 // Reference LAST.
1138 // Set all buffer_idx to 0.
1139 // Set GOLDEN to slot 5 and update slot 5.
1140 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141 ref_frame_config->ref_idx[i] = 0;
1142 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1143 ref_frame_config->refresh[5 - shift] = 1;
1144 } else if (layer_id->spatial_layer_id == 1) {
1145 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1146 // GOLDEN (and all other refs) to slot 5.
1147 // Set LAST3 to slot 6 and update slot 6.
1148 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1149 ref_frame_config->ref_idx[i] = 5 - shift;
1150 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1151 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1152 ref_frame_config->refresh[6 - shift] = 1;
1153 } else if (layer_id->spatial_layer_id == 2) {
1154 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1155 // GOLDEN (and all other refs) to slot 6.
1156 // Set LAST3 to slot 7 and update slot 7.
1157 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1158 ref_frame_config->ref_idx[i] = 6 - shift;
1159 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1160 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1161 ref_frame_config->refresh[7 - shift] = 1;
1162 }
1163 } else if ((superframe_cnt - 3) % 4 == 0) {
1164 // Second top temporal enhancement layer.
1165 layer_id->temporal_layer_id = 2;
1166 if (layer_id->spatial_layer_id == 0) {
1167 // Set LAST to slot 5 and reference LAST.
1168 // Set GOLDEN to slot 3 and update slot 3.
1169 // Set all other buffer_idx to 0.
1170 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1171 ref_frame_config->ref_idx[i] = 0;
1172 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1173 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1174 ref_frame_config->refresh[3] = 1;
1175 } else if (layer_id->spatial_layer_id == 1) {
1176 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1177 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1178 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1179 ref_frame_config->ref_idx[i] = 0;
1180 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1181 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1182 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1183 ref_frame_config->refresh[4] = 1;
1184 } else if (layer_id->spatial_layer_id == 2) {
1185 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1186 // GOLDEN to slot 4. No update.
1187 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1188 ref_frame_config->ref_idx[i] = 0;
1189 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1190 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1191 }
1192 }
1193 break;
1194 case 11:
1195 // Simulcast mode for 3 spatial and 3 temporal layers.
1196 // No inter-layer predicton, only prediction is temporal and single
1197 // reference (LAST).
1198 // No overlap in buffer slots between spatial layers. So for example,
1199 // SL0 only uses slots 0 and 1.
1200 // SL1 only uses slots 2 and 3.
1201 // SL2 only uses slots 4 and 5.
1202 // All 7 references for each inter-frame must only access buffer slots
1203 // for that spatial layer.
1204 // On key (super)frames: SL1 and SL2 must have no references set
1205 // and must refresh all the slots for that layer only (so 2 and 3
1206 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1207 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1208 // internally as Intra-only frames that allow that stream to be decoded.
1209 // These conditions will allow for each spatial stream to be
1210 // independently decodeable.
1211
1212 // Initialize all references to 0 (don't use reference).
1213 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1214 ref_frame_config->reference[i] = 0;
1215 // Initialize as no refresh/update for all slots.
1216 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1217 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1218 ref_frame_config->ref_idx[i] = 0;
1219
1220 if (is_key_frame) {
1221 if (layer_id->spatial_layer_id == 0) {
1222 // Assign LAST/GOLDEN to slot 0/1.
1223 // Refesh slots 0 and 1 for SL0.
1224 // SL0: this will get set to KEY frame internally.
1225 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1226 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1227 ref_frame_config->refresh[0] = 1;
1228 ref_frame_config->refresh[1] = 1;
1229 } else if (layer_id->spatial_layer_id == 1) {
1230 // Assign LAST/GOLDEN to slot 2/3.
1231 // Refesh slots 2 and 3 for SL1.
1232 // This will get set to Intra-only frame internally.
1233 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1234 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1235 ref_frame_config->refresh[2] = 1;
1236 ref_frame_config->refresh[3] = 1;
1237 } else if (layer_id->spatial_layer_id == 2) {
1238 // Assign LAST/GOLDEN to slot 4/5.
1239 // Refresh slots 4 and 5 for SL2.
1240 // This will get set to Intra-only frame internally.
1241 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1242 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1243 ref_frame_config->refresh[4] = 1;
1244 ref_frame_config->refresh[5] = 1;
1245 }
1246 } else if (superframe_cnt % 4 == 0) {
1247 // Base temporal layer: TL0
1248 layer_id->temporal_layer_id = 0;
1249 if (layer_id->spatial_layer_id == 0) { // SL0
1250 // Reference LAST. Assign all references to either slot
1251 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1252 // Update slot 0 (LAST).
1253 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1254 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1255 ref_frame_config->ref_idx[i] = 1;
1256 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1257 ref_frame_config->refresh[0] = 1;
1258 } else if (layer_id->spatial_layer_id == 1) { // SL1
1259 // Reference LAST. Assign all references to either slot
1260 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1261 // Update slot 2 (LAST).
1262 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1263 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1264 ref_frame_config->ref_idx[i] = 3;
1265 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1266 ref_frame_config->refresh[2] = 1;
1267 } else if (layer_id->spatial_layer_id == 2) { // SL2
1268 // Reference LAST. Assign all references to either slot
1269 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1270 // Update slot 4 (LAST).
1271 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1272 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1273 ref_frame_config->ref_idx[i] = 5;
1274 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1275 ref_frame_config->refresh[4] = 1;
1276 }
1277 } else if ((superframe_cnt - 1) % 4 == 0) {
1278 // First top temporal enhancement layer: TL2
1279 layer_id->temporal_layer_id = 2;
1280 if (layer_id->spatial_layer_id == 0) { // SL0
1281 // Reference LAST (slot 0). Assign other references to slot 1.
1282 // No update/refresh on any slots.
1283 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1284 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1285 ref_frame_config->ref_idx[i] = 1;
1286 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1287 } else if (layer_id->spatial_layer_id == 1) { // SL1
1288 // Reference LAST (slot 2). Assign other references to slot 3.
1289 // No update/refresh on any slots.
1290 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1291 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1292 ref_frame_config->ref_idx[i] = 3;
1293 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1294 } else if (layer_id->spatial_layer_id == 2) { // SL2
1295 // Reference LAST (slot 4). Assign other references to slot 4.
1296 // No update/refresh on any slots.
1297 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1298 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1299 ref_frame_config->ref_idx[i] = 5;
1300 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1301 }
1302 } else if ((superframe_cnt - 2) % 4 == 0) {
1303 // Middle temporal enhancement layer: TL1
1304 layer_id->temporal_layer_id = 1;
1305 if (layer_id->spatial_layer_id == 0) { // SL0
1306 // Reference LAST (slot 0).
1307 // Set GOLDEN to slot 1 and update slot 1.
1308 // This will be used as reference for next TL2.
1309 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1310 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1311 ref_frame_config->ref_idx[i] = 1;
1312 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1313 ref_frame_config->refresh[1] = 1;
1314 } else if (layer_id->spatial_layer_id == 1) { // SL1
1315 // Reference LAST (slot 2).
1316 // Set GOLDEN to slot 3 and update slot 3.
1317 // This will be used as reference for next TL2.
1318 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1319 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1320 ref_frame_config->ref_idx[i] = 3;
1321 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1322 ref_frame_config->refresh[3] = 1;
1323 } else if (layer_id->spatial_layer_id == 2) { // SL2
1324 // Reference LAST (slot 4).
1325 // Set GOLDEN to slot 5 and update slot 5.
1326 // This will be used as reference for next TL2.
1327 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1328 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1329 ref_frame_config->ref_idx[i] = 5;
1330 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1331 ref_frame_config->refresh[5] = 1;
1332 }
1333 } else if ((superframe_cnt - 3) % 4 == 0) {
1334 // Second top temporal enhancement layer: TL2
1335 layer_id->temporal_layer_id = 2;
1336 if (layer_id->spatial_layer_id == 0) { // SL0
1337 // Reference LAST (slot 1). Assign other references to slot 0.
1338 // No update/refresh on any slots.
1339 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1340 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1341 ref_frame_config->ref_idx[i] = 0;
1342 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1343 } else if (layer_id->spatial_layer_id == 1) { // SL1
1344 // Reference LAST (slot 3). Assign other references to slot 2.
1345 // No update/refresh on any slots.
1346 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1347 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1348 ref_frame_config->ref_idx[i] = 2;
1349 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1350 } else if (layer_id->spatial_layer_id == 2) { // SL2
1351 // Reference LAST (slot 5). Assign other references to slot 4.
1352 // No update/refresh on any slots.
1353 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1354 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1355 ref_frame_config->ref_idx[i] = 4;
1356 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1357 }
1358 }
1359 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1360 // Always reference GOLDEN (inter-layer prediction).
1361 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1362 if (ksvc_mode) {
1363 // KSVC: only keep the inter-layer reference (GOLDEN) for
1364 // superframes whose base is key.
1365 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1366 }
1367 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1368 // On superframes whose base is key: remove LAST to avoid prediction
1369 // off layer two levels below.
1370 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1371 }
1372 }
1373 // For 3 spatial layer case 8 (where there is free buffer slot):
1374 // allow for top spatial layer to use additional temporal reference.
1375 // Additional reference is only updated on base temporal layer, every
1376 // 10 TL0 frames here.
1377 if (!simulcast_mode && enable_longterm_temporal_ref &&
1378 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1379 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1380 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1381 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1382 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1383 }
1384 break;
1385 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1386 }
1387}
1388
1389static void write_literal(struct aom_write_bit_buffer *wb, int data, int bits,
1390 int offset = 0) {
1391 const int to_write = data - offset;
1392 if (to_write < 0 || to_write >= (1 << bits)) {
1393 die("Invalid data, value %d out of range [%d, %d]\n", data, offset,
1394 offset + (1 << bits) - 1);
1395 }
1396 aom_wb_write_literal(wb, to_write, bits);
1397}
1398
1399static void write_depth_representation_element(
1400 struct aom_write_bit_buffer *buffer,
1401 const std::pair<libaom_examples::DepthRepresentationElement, bool>
1402 &element) {
1403 if (!element.second) {
1404 return;
1405 }
1406 write_literal(buffer, element.first.sign_flag, 1);
1407 write_literal(buffer, element.first.exponent, 7);
1408 int mantissa_len = 1;
1409 while (mantissa_len < 32 && (element.first.mantissa >> mantissa_len != 0)) {
1410 ++mantissa_len;
1411 }
1412 write_literal(buffer, mantissa_len - 1, 5);
1413 write_literal(buffer, element.first.mantissa, mantissa_len);
1414}
1415
1416static void write_color_properties(
1417 struct aom_write_bit_buffer *buffer,
1418 const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1419 write_literal(buffer, color_properties.second, 1);
1420 if (color_properties.second) {
1421 write_literal(buffer, color_properties.first.color_range, 1);
1422 write_literal(buffer, color_properties.first.color_primaries, 8);
1423 write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1424 write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1425 } else {
1426 write_literal(buffer, 0, 1); // reserved_1bit
1427 }
1428}
1429
1430static void add_multilayer_metadata(
1431 aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1432 // Pretty large buffer to accommodate the largest multilayer metadata
1433 // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
1434 std::vector<uint8_t> data(66000 * multilayer.layers.size());
1435 struct aom_write_bit_buffer buffer = { data.data(), 0 };
1436
1437 write_literal(&buffer, multilayer.use_case, 6);
1438 if (multilayer.layers.empty()) {
1439 die("Invalid multilayer metadata, no layers found\n");
1440 } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1441 die("Invalid multilayer metadata, too many layers (max is %d)\n",
1442 MAX_NUM_SPATIAL_LAYERS);
1443 }
1444 write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1445 assert(buffer.bit_offset % 8 == 0);
1446 for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1447 const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1448 // Alpha info with segmentation with labels can be up to about 66k bytes,
1449 // which requires 3 bytes to encode in leb128.
1450 const int bytes_reserved_for_size = 3;
1451 // Placeholder for layer_metadata_size which will be written later.
1452 write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1453 const uint32_t metadata_start = buffer.bit_offset;
1454 write_literal(&buffer, (int)i, 2); // ml_spatial_id
1455 write_literal(&buffer, layer.layer_type, 5);
1456 write_literal(&buffer, layer.luma_plane_only_flag, 1);
1457 write_literal(&buffer, layer.layer_view_type, 3);
1458 write_literal(&buffer, layer.group_id, 2);
1459 write_literal(&buffer, layer.layer_dependency_idc, 3);
1460 write_literal(&buffer, layer.layer_metadata_scope, 2);
1461 write_literal(&buffer, 0, 4); // ml_reserved_4bits
1462
1463 if (i > 0) {
1464 write_color_properties(&buffer, layer.layer_color_description);
1465 } else {
1466 write_literal(&buffer, 0, 2); // ml_reserved_2bits
1467 }
1468 assert(buffer.bit_offset % 8 == 0);
1469
1470 if (multilayer.use_case < 12) {
1471 if (layer.layer_type == libaom_examples::MULTIALYER_LAYER_TYPE_ALPHA &&
1472 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1473 const libaom_examples::AlphaInformation &alpha_info =
1474 layer.global_alpha_info;
1475 write_literal(&buffer, alpha_info.alpha_use_idc, 3);
1476 write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1477 write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1478 write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1479 write_literal(&buffer, alpha_info.alpha_transparent_value,
1480 alpha_info.alpha_bit_depth);
1481 write_literal(&buffer, alpha_info.alpha_opaque_value,
1482 alpha_info.alpha_bit_depth);
1483 if (buffer.bit_offset % 8 != 0) {
1484 // ai_byte_alignment_bits
1485 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1486 }
1487 assert(buffer.bit_offset % 8 == 0);
1488
1489 if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
1490 write_literal(&buffer, 0, 6); // ai_reserved_6bits
1491 write_color_properties(&buffer, alpha_info.alpha_color_description);
1492 } else if (alpha_info.alpha_use_idc ==
1493 libaom_examples::ALPHA_SEGMENTATION) {
1494 write_literal(&buffer, 0, 7); // ai_reserved_7bits
1495 write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
1496 if (!alpha_info.label_type_id.empty()) {
1497 const size_t num_values =
1498 std::abs(alpha_info.alpha_transparent_value -
1499 alpha_info.alpha_opaque_value) +
1500 1;
1501 if (!alpha_info.label_type_id.empty() &&
1502 alpha_info.label_type_id.size() != num_values) {
1503 die("Invalid multilayer metadata, label_type_id size must be "
1504 "equal to the range of alpha values between "
1505 "alpha_transparent_value and alpha_opaque_value (expected "
1506 "%d values, found %d values)\n",
1507 (int)num_values, (int)alpha_info.label_type_id.size());
1508 }
1509 for (size_t j = 0; j < num_values; ++j) {
1510 write_literal(&buffer, alpha_info.label_type_id[j], 16);
1511 }
1512 }
1513 }
1514 assert(buffer.bit_offset % 8 == 0);
1515 } else if (layer.layer_type ==
1516 libaom_examples::MULTIALYER_LAYER_TYPE_DEPTH &&
1517 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1518 const libaom_examples::DepthInformation &depth_info =
1519 layer.global_depth_info;
1520 write_literal(&buffer, depth_info.z_near.second, 1);
1521 write_literal(&buffer, depth_info.z_far.second, 1);
1522 write_literal(&buffer, depth_info.d_min.second, 1);
1523 write_literal(&buffer, depth_info.d_max.second, 1);
1524 write_literal(&buffer, depth_info.depth_representation_type, 4);
1525 if (depth_info.d_min.second || depth_info.d_max.second) {
1526 write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1527 }
1528 write_depth_representation_element(&buffer, depth_info.z_near);
1529 write_depth_representation_element(&buffer, depth_info.z_far);
1530 write_depth_representation_element(&buffer, depth_info.d_min);
1531 write_depth_representation_element(&buffer, depth_info.d_max);
1532 if (depth_info.depth_representation_type == 3) {
1533 write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
1534 /*offset=*/8);
1535 if (depth_info.depth_nonlinear_representation_model.empty() ||
1536 depth_info.depth_nonlinear_representation_model.size() >
1537 (1 << 6)) {
1538 die("Invalid multilayer metadata, if depth_nonlinear_precision "
1539 "== 3, depth_nonlinear_representation_model must have 1 to "
1540 "%d elements, found %d elements\n",
1541 1 << 6,
1542 (int)depth_info.depth_nonlinear_representation_model.size());
1543 }
1544 write_literal(
1545 &buffer,
1546 (int)depth_info.depth_nonlinear_representation_model.size() - 1,
1547 6);
1548 const int bit_depth =
1549 depth_info.depth_nonlinear_precision + 8; // XXX + 9 ???
1550 for (const uint32_t v :
1551 depth_info.depth_nonlinear_representation_model) {
1552 write_literal(&buffer, v, bit_depth);
1553 }
1554 }
1555 if (buffer.bit_offset % 8 != 0) {
1556 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1557 }
1558 assert(buffer.bit_offset % 8 == 0);
1559 }
1560 }
1561 assert(buffer.bit_offset % 8 == 0);
1562
1563 const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1564 const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1565 size_t coded_size;
1566 if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1567 bytes_reserved_for_size,
1568 &buffer.bit_buffer[size_pos], &coded_size)) {
1569 // Need to increase bytes_reserved_for_size in the code above.
1570 die("Error: Failed to write metadata size\n");
1571 }
1572 }
1573 assert(buffer.bit_offset % 8 == 0);
1574 if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1575 buffer.bit_buffer, buffer.bit_offset / 8,
1577 die("Error: Failed to add metadata\n");
1578 }
1579}
1580
1581#if CONFIG_AV1_DECODER
1582// Returns whether there is a mismatch between the encoder's new frame and the
1583// decoder's new frame.
1584static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1585 const int frames_out) {
1586 aom_image_t enc_img, dec_img;
1587 int mismatch = 0;
1588
1589 /* Get the internal new frame */
1592
1593#if CONFIG_AV1_HIGHBITDEPTH
1594 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1595 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1596 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1597 aom_image_t enc_hbd_img;
1599 &enc_hbd_img,
1600 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1601 enc_img.d_w, enc_img.d_h, 16);
1602 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1603 enc_img = enc_hbd_img;
1604 }
1605 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1606 aom_image_t dec_hbd_img;
1608 &dec_hbd_img,
1609 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1610 dec_img.d_w, dec_img.d_h, 16);
1611 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1612 dec_img = dec_hbd_img;
1613 }
1614 }
1615#endif
1616
1617 if (!aom_compare_img(&enc_img, &dec_img)) {
1618 int y[4], u[4], v[4];
1619#if CONFIG_AV1_HIGHBITDEPTH
1620 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1621 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1622 } else {
1623 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1624 }
1625#else
1626 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1627#endif
1628 fprintf(stderr,
1629 "Encode/decode mismatch on frame %d at"
1630 " Y[%d, %d] {%d/%d},"
1631 " U[%d, %d] {%d/%d},"
1632 " V[%d, %d] {%d/%d}\n",
1633 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1634 v[1], v[2], v[3]);
1635 mismatch = 1;
1636 }
1637
1638 aom_img_free(&enc_img);
1639 aom_img_free(&dec_img);
1640 return mismatch;
1641}
1642#endif // CONFIG_AV1_DECODER
1643
1644struct psnr_stats {
1645 // The second element of these arrays is reserved for high bitdepth.
1646 uint64_t psnr_sse_total[2];
1647 uint64_t psnr_samples_total[2];
1648 double psnr_totals[2][4];
1649 int psnr_count[2];
1650};
1651
1652static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1653 double ovpsnr;
1654
1655 if (!psnr_stream->psnr_count[0]) return;
1656
1657 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1658 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1659 (double)psnr_stream->psnr_sse_total[0]);
1660 fprintf(stderr, " %.3f", ovpsnr);
1661
1662 for (int i = 0; i < 4; i++) {
1663 fprintf(stderr, " %.3f",
1664 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1665 }
1666 fprintf(stderr, "\n");
1667}
1668
1669static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1670 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1671 aom::AV1RateControlRtcConfig rc_cfg;
1672 rc_cfg.width = cfg.g_w;
1673 rc_cfg.height = cfg.g_h;
1674 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1675 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1676 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1677 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1678 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1679 rc_cfg.buf_sz = cfg.rc_buf_sz;
1680 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1681 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1682 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1683 rc_cfg.max_intra_bitrate_pct = 300;
1684 rc_cfg.framerate = cfg.g_timebase.den;
1685 // TODO(jianj): Add suppor for SVC.
1686 rc_cfg.ss_number_layers = 1;
1687 rc_cfg.ts_number_layers = 1;
1688 rc_cfg.scaling_factor_num[0] = 1;
1689 rc_cfg.scaling_factor_den[0] = 1;
1690 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1691 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1692 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1693 rc_cfg.aq_mode = app_input.aq_mode;
1694
1695 return rc_cfg;
1696}
1697
1698static int qindex_to_quantizer(int qindex) {
1699 // Table that converts 0-63 range Q values passed in outside to the 0-255
1700 // range Qindex used internally.
1701 static const int quantizer_to_qindex[] = {
1702 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1703 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1704 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1705 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1706 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1707 };
1708 for (int quantizer = 0; quantizer < 64; ++quantizer)
1709 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1710
1711 return 63;
1712}
1713
1714static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1715 aom_codec_ctx_t *codec, int frame_cnt) {
1716 aom_active_map_t map = { 0, 0, 0 };
1717
1718 map.rows = (cfg->g_h + 15) / 16;
1719 map.cols = (cfg->g_w + 15) / 16;
1720
1721 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1722 if (!map.active_map) die("Failed to allocate active map");
1723
1724 // Example map for testing.
1725 for (unsigned int i = 0; i < map.rows; ++i) {
1726 for (unsigned int j = 0; j < map.cols; ++j) {
1727 int index = map.cols * i + j;
1728 map.active_map[index] = 1;
1729 if (frame_cnt < 300) {
1730 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1731 } else if (frame_cnt >= 300) {
1732 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1733 }
1734 }
1735 }
1736
1737 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1738 die_codec(codec, "Failed to set active map");
1739
1740 free(map.active_map);
1741}
1742
1743int main(int argc, const char **argv) {
1744 AppInput app_input;
1745 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1746 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1747 AvxVideoWriter *total_layer_file = NULL;
1748 FILE *total_layer_obu_file = NULL;
1750 int frame_cnt = 0;
1751 aom_image_t raw;
1752 int frame_avail;
1753 int got_data = 0;
1754 int flags = 0;
1755 int i;
1756 int pts = 0; // PTS starts at 0.
1757 int frame_duration = 1; // 1 timebase tick per frame.
1758 aom_svc_layer_id_t layer_id;
1759 aom_svc_params_t svc_params;
1760 aom_svc_ref_frame_config_t ref_frame_config;
1761 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1762
1763#if CONFIG_INTERNAL_STATS
1764 FILE *stats_file = fopen("opsnr.stt", "a");
1765 if (stats_file == NULL) {
1766 die("Cannot open opsnr.stt\n");
1767 }
1768#endif
1769#if CONFIG_AV1_DECODER
1770 aom_codec_ctx_t decoder;
1771#endif
1772
1773 struct RateControlMetrics rc;
1774 int64_t cx_time = 0;
1775 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1776 int frame_cnt_layer[AOM_MAX_LAYERS];
1777 double sum_bitrate = 0.0;
1778 double sum_bitrate2 = 0.0;
1779 double framerate = 30.0;
1780 int use_svc_control = 1;
1781 int set_err_resil_frame = 0;
1782 int test_changing_bitrate = 0;
1783 zero(rc.layer_target_bitrate);
1784 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1785 memset(&app_input, 0, sizeof(AppInput));
1786 memset(&svc_params, 0, sizeof(svc_params));
1787
1788 // Flag to test dynamic scaling of source frames for single
1789 // spatial stream, using the scaling_mode control.
1790 const int test_dynamic_scaling_single_layer = 0;
1791
1792 // Flag to test setting speed per layer.
1793 const int test_speed_per_layer = 0;
1794
1795 // Flag for testing active maps.
1796 const int test_active_maps = 0;
1797
1798 /* Setup default input stream settings */
1799 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1800 app_input.input_ctx[i].framerate.numerator = 30;
1801 app_input.input_ctx[i].framerate.denominator = 1;
1802 app_input.input_ctx[i].only_i420 = 0;
1803 app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1804 }
1805 app_input.speed = 7;
1806 exec_name = argv[0];
1807
1808 // start with default encoder configuration
1811 if (res != AOM_CODEC_OK) {
1812 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1813 }
1814
1815 // Real time parameters.
1817
1818 cfg.rc_end_usage = AOM_CBR;
1819 cfg.rc_min_quantizer = 2;
1820 cfg.rc_max_quantizer = 52;
1821 cfg.rc_undershoot_pct = 50;
1822 cfg.rc_overshoot_pct = 50;
1823 cfg.rc_buf_initial_sz = 600;
1824 cfg.rc_buf_optimal_sz = 600;
1825 cfg.rc_buf_sz = 1000;
1826 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1827 cfg.g_lag_in_frames = 0;
1828 cfg.kf_mode = AOM_KF_AUTO;
1829 cfg.g_w = 0; // Force user to specify width and height for raw input.
1830 cfg.g_h = 0;
1831
1832 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1833
1834 int ts_number_layers = svc_params.number_temporal_layers;
1835 int ss_number_layers = svc_params.number_spatial_layers;
1836
1837 unsigned int width = cfg.g_w;
1838 unsigned int height = cfg.g_h;
1839
1840 if (app_input.layering_mode >= 0) {
1841 if (ts_number_layers !=
1842 mode_to_num_temporal_layers[app_input.layering_mode] ||
1843 ss_number_layers !=
1844 mode_to_num_spatial_layers[app_input.layering_mode]) {
1845 die("Number of layers doesn't match layering mode.");
1846 }
1847 }
1848
1849 bool has_non_y4m_input = false;
1850 for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1851 if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1852 has_non_y4m_input = true;
1853 break;
1854 }
1855 }
1856 // Y4M reader has its own allocation.
1857 if (has_non_y4m_input) {
1858 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1859 die("Failed to allocate image (%dx%d)", width, height);
1860 }
1861 }
1862
1864
1865 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1866 sizeof(svc_params.layer_target_bitrate));
1867
1868 unsigned int total_rate = 0;
1869 for (i = 0; i < ss_number_layers; i++) {
1870 total_rate +=
1871 svc_params
1872 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1873 }
1874 if (total_rate != cfg.rc_target_bitrate) {
1875 die("Incorrect total target bitrate, expected: %d", total_rate);
1876 }
1877
1878 svc_params.framerate_factor[0] = 1;
1879 if (ts_number_layers == 2) {
1880 svc_params.framerate_factor[0] = 2;
1881 svc_params.framerate_factor[1] = 1;
1882 } else if (ts_number_layers == 3) {
1883 svc_params.framerate_factor[0] = 4;
1884 svc_params.framerate_factor[1] = 2;
1885 svc_params.framerate_factor[2] = 1;
1886 }
1887
1888 libaom_examples::MultilayerMetadata multilayer_metadata;
1889 if (app_input.multilayer_metadata_file != NULL) {
1890 multilayer_metadata = libaom_examples::parse_multilayer_file(
1891 app_input.multilayer_metadata_file);
1892 libaom_examples::print_multilayer_metadata(multilayer_metadata);
1893 }
1894
1895 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1896 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1897
1898 AvxVideoInfo info;
1899 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1900 info.frame_width = cfg.g_w;
1901 info.frame_height = cfg.g_h;
1902 info.time_base.numerator = cfg.g_timebase.num;
1903 info.time_base.denominator = cfg.g_timebase.den;
1904 // Open an output file for each stream.
1905 for (int sl = 0; sl < ss_number_layers; ++sl) {
1906 for (int tl = 0; tl < ts_number_layers; ++tl) {
1907 i = sl * ts_number_layers + tl;
1908 char file_name[PATH_MAX];
1909 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1910 app_input.output_filename, i);
1911 if (app_input.output_obu) {
1912 obu_files[i] = fopen(file_name, "wb");
1913 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1914 } else {
1915 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1916 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1917 }
1918 }
1919 }
1920 if (app_input.output_obu) {
1921 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1922 if (!total_layer_obu_file)
1923 die("Failed to open %s for writing", app_input.output_filename);
1924 } else {
1925 total_layer_file =
1926 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1927 if (!total_layer_file)
1928 die("Failed to open %s for writing", app_input.output_filename);
1929 }
1930
1931 // Initialize codec.
1932 aom_codec_ctx_t codec;
1933 aom_codec_flags_t flag = 0;
1935 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1936 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1937 die_codec(&codec, "Failed to initialize encoder");
1938
1939#if CONFIG_AV1_DECODER
1940 if (app_input.decode) {
1941 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1942 die_codec(&decoder, "Failed to initialize decoder");
1943 }
1944#endif
1945
1946 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1947 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1962
1963 // Settings to reduce key frame encoding time.
1969
1971
1972 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1973 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1975 // INTRABC is currently disabled for rt mode, as it's too slow.
1977 }
1978
1979 if (app_input.use_external_rc) {
1981 }
1982
1984
1987
1989
1990 svc_params.number_spatial_layers = ss_number_layers;
1991 svc_params.number_temporal_layers = ts_number_layers;
1992 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1993 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1994 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1995 }
1996 if (!app_input.scale_factors_explicitly_set) {
1997 for (i = 0; i < ss_number_layers; ++i) {
1998 svc_params.scaling_factor_num[i] = 1;
1999 svc_params.scaling_factor_den[i] = 1;
2000 }
2001 if (ss_number_layers == 2) {
2002 svc_params.scaling_factor_num[0] = 1;
2003 svc_params.scaling_factor_den[0] = 2;
2004 } else if (ss_number_layers == 3) {
2005 svc_params.scaling_factor_num[0] = 1;
2006 svc_params.scaling_factor_den[0] = 4;
2007 svc_params.scaling_factor_num[1] = 1;
2008 svc_params.scaling_factor_den[1] = 2;
2009 }
2010 }
2011 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
2012 // TODO(aomedia:3032): Configure KSVC in fixed mode.
2013
2014 // This controls the maximum target size of the key frame.
2015 // For generating smaller key frames, use a smaller max_intra_size_pct
2016 // value, like 100 or 200.
2017 {
2018 const int max_intra_size_pct = 300;
2020 max_intra_size_pct);
2021 }
2022
2023 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2024 cx_time_layer[lx] = 0;
2025 frame_cnt_layer[lx] = 0;
2026 }
2027
2028 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2029 if (app_input.use_external_rc) {
2030 const aom::AV1RateControlRtcConfig rc_cfg =
2031 create_rtc_rc_config(cfg, app_input);
2032 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2033 }
2034
2035 frame_avail = 1;
2036 struct psnr_stats psnr_stream;
2037 memset(&psnr_stream, 0, sizeof(psnr_stream));
2038 while (frame_avail || got_data) {
2039 struct aom_usec_timer timer;
2040 frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2041 // Loop over spatial layers.
2042 for (int slx = 0; slx < ss_number_layers; slx++) {
2043 if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2044 const int previous_layer_frame_avail = frame_avail;
2045 frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2046 if (previous_layer_frame_avail != frame_avail) {
2047 die("Mismatch in number of frames between spatial layer input files");
2048 }
2049 }
2050
2051 aom_codec_iter_t iter = NULL;
2052 const aom_codec_cx_pkt_t *pkt;
2053 int layer = 0;
2054 // Flag for superframe whose base is key.
2055 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2056 // For flexible mode:
2057 if (app_input.layering_mode >= 0) {
2058 // Set the reference/update flags, layer_id, and reference_map
2059 // buffer index.
2060 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2061 &ref_frame_config, &ref_frame_comp_pred,
2062 &use_svc_control, slx, is_key_frame,
2063 (app_input.layering_mode == 10), app_input.speed);
2064 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2065 if (use_svc_control) {
2067 &ref_frame_config);
2069 &ref_frame_comp_pred);
2070 }
2071 if (app_input.multilayer_metadata_file != NULL) {
2072 add_multilayer_metadata(&raw, multilayer_metadata);
2073 }
2074 // Set the speed per layer.
2075 if (test_speed_per_layer) {
2076 int speed_per_layer = 10;
2077 if (layer_id.spatial_layer_id == 0) {
2078 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2079 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2080 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2081 } else if (layer_id.spatial_layer_id == 1) {
2082 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2083 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2084 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2085 } else if (layer_id.spatial_layer_id == 2) {
2086 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2087 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2088 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2089 }
2090 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2091 }
2092 } else {
2093 // Only up to 3 temporal layers supported in fixed mode.
2094 // Only need to set spatial and temporal layer_id: reference
2095 // prediction, refresh, and buffer_idx are set internally.
2096 layer_id.spatial_layer_id = slx;
2097 layer_id.temporal_layer_id = 0;
2098 if (ts_number_layers == 2) {
2099 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2100 } else if (ts_number_layers == 3) {
2101 if (frame_cnt % 2 != 0)
2102 layer_id.temporal_layer_id = 2;
2103 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2104 layer_id.temporal_layer_id = 1;
2105 }
2106 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2107 }
2108
2109 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2110 // Set error_resilient per frame: off/0 for base layer and
2111 // on/1 for enhancement layer frames.
2112 // Note that this is can only be done on the fly/per-frame/layer
2113 // if the config error_resilience is off/0. See the logic for updating
2114 // in set_encoder_config():
2115 // tool_cfg->error_resilient_mode =
2116 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2117 const int err_resil_mode =
2118 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2120 err_resil_mode);
2121 }
2122
2123 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2124 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2125
2126 if (test_dynamic_scaling_single_layer) {
2127 // Example to scale source down by 2x2, then 4x4, and then back up to
2128 // 2x2, and then back to original.
2129 int frame_2x2 = 200;
2130 int frame_4x4 = 400;
2131 int frame_2x2up = 600;
2132 int frame_orig = 800;
2133 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2134 // Scale source down by 2x2.
2135 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2136 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2137 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2138 // Scale source down by 4x4.
2139 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2140 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2141 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2142 // Source back up to 2x2.
2143 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2144 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2145 } else if (frame_cnt >= frame_orig) {
2146 // Source back up to original resolution (no scaling).
2147 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2148 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2149 }
2150 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2151 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2152 // For dynamic resize testing on single layer: refresh all references
2153 // on the resized frame: this is to avoid decode error:
2154 // if resize goes down by >= 4x4 then libaom decoder will throw an
2155 // error that some reference (even though not used) is beyond the
2156 // limit size (must be smaller than 4x4).
2157 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2158 if (use_svc_control) {
2160 &ref_frame_config);
2162 &ref_frame_comp_pred);
2163 }
2164 }
2165 }
2166
2167 // Change target_bitrate every other frame.
2168 if (test_changing_bitrate && frame_cnt % 2 == 0) {
2169 if (frame_cnt < 500)
2170 cfg.rc_target_bitrate += 10;
2171 else
2172 cfg.rc_target_bitrate -= 10;
2173 // Do big increase and decrease.
2174 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2175 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2176 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2177 // Call change_config, or bypass with new control.
2178 // res = aom_codec_enc_config_set(&codec, &cfg);
2180 cfg.rc_target_bitrate))
2181 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2182 }
2183
2184 if (rc_api) {
2185 aom::AV1FrameParamsRTC frame_params;
2186 // TODO(jianj): Add support for SVC.
2187 frame_params.spatial_layer_id = 0;
2188 frame_params.temporal_layer_id = 0;
2189 frame_params.frame_type =
2190 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2191 rc_api->ComputeQP(frame_params);
2192 const int current_qp = rc_api->GetQP();
2194 qindex_to_quantizer(current_qp))) {
2195 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2196 }
2197 }
2198
2199 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2200
2201 // Do the layer encode.
2202 aom_usec_timer_start(&timer);
2203 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2204 die_codec(&codec, "Failed to encode frame");
2205 aom_usec_timer_mark(&timer);
2206 cx_time += aom_usec_timer_elapsed(&timer);
2207 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2208 frame_cnt_layer[layer] += 1;
2209
2210 // Get the high motion content flag.
2211 int content_flag = 0;
2213 &content_flag)) {
2214 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2215 }
2216
2217 got_data = 0;
2218 // For simulcast (mode 11): write out each spatial layer to the file.
2219 int ss_layers_write = (app_input.layering_mode == 11)
2220 ? layer_id.spatial_layer_id + 1
2221 : ss_number_layers;
2222 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2223 switch (pkt->kind) {
2225 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2226 ++sl) {
2227 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2228 ++tl) {
2229 int j = sl * ts_number_layers + tl;
2230 if (app_input.output_obu) {
2231 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2232 obu_files[j]);
2233 } else {
2234 aom_video_writer_write_frame(
2235 outfile[j],
2236 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2237 pkt->data.frame.sz, pts);
2238 }
2239 if (sl == layer_id.spatial_layer_id)
2240 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2241 }
2242 }
2243 got_data = 1;
2244 // Write everything into the top layer.
2245 if (app_input.output_obu) {
2246 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2247 total_layer_obu_file);
2248 } else {
2249 aom_video_writer_write_frame(
2250 total_layer_file,
2251 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2252 pkt->data.frame.sz, pts);
2253 }
2254 // Keep count of rate control stats per layer (for non-key).
2255 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2256 int j = layer_id.spatial_layer_id * ts_number_layers +
2257 layer_id.temporal_layer_id;
2258 assert(j >= 0);
2259 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2260 rc.layer_avg_rate_mismatch[j] +=
2261 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2262 rc.layer_pfb[j];
2263 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2264 }
2265
2266 if (rc_api) {
2267 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2268 }
2269 // Update for short-time encoding bitrate states, for moving window
2270 // of size rc->window, shifted by rc->window / 2.
2271 // Ignore first window segment, due to key frame.
2272 // For spatial layers: only do this for top/highest SL.
2273 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2274 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2275 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2276 if (frame_cnt % rc.window_size == 0) {
2277 rc.window_count += 1;
2278 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2279 rc.variance_st_encoding_bitrate +=
2280 (sum_bitrate / rc.window_size) *
2281 (sum_bitrate / rc.window_size);
2282 sum_bitrate = 0.0;
2283 }
2284 }
2285 // Second shifted window.
2286 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2287 slx == ss_number_layers - 1) {
2288 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2289 if (frame_cnt > 2 * rc.window_size &&
2290 frame_cnt % rc.window_size == 0) {
2291 rc.window_count += 1;
2292 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2293 rc.variance_st_encoding_bitrate +=
2294 (sum_bitrate2 / rc.window_size) *
2295 (sum_bitrate2 / rc.window_size);
2296 sum_bitrate2 = 0.0;
2297 }
2298 }
2299
2300#if CONFIG_AV1_DECODER
2301 if (app_input.decode) {
2302 if (aom_codec_decode(
2303 &decoder,
2304 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2305 pkt->data.frame.sz, NULL))
2306 die_codec(&decoder, "Failed to decode frame");
2307 }
2308#endif
2309
2310 break;
2311 case AOM_CODEC_PSNR_PKT:
2312 if (app_input.show_psnr) {
2313 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2314 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2315 for (int plane = 0; plane < 4; plane++) {
2316 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2317 }
2318 psnr_stream.psnr_count[0]++;
2319 }
2320 break;
2321 default: break;
2322 }
2323 }
2324#if CONFIG_AV1_DECODER
2325 if (got_data && app_input.decode) {
2326 // Don't look for mismatch on top spatial and top temporal layers as
2327 // they are non reference frames.
2328 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2329 !(layer_id.temporal_layer_id > 0 &&
2330 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2331 if (test_decode(&codec, &decoder, frame_cnt)) {
2332#if CONFIG_INTERNAL_STATS
2333 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2334 frame_cnt);
2335 fclose(stats_file);
2336#endif
2337 fatal("Mismatch seen");
2338 }
2339 }
2340 }
2341#endif
2342 } // loop over spatial layers
2343 ++frame_cnt;
2344 pts += frame_duration;
2345 }
2346
2347 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2348 if (app_input.input_ctx[i].filename == NULL) {
2349 break;
2350 }
2351 close_input_file(&(app_input.input_ctx[i]));
2352 }
2353 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2354 ts_number_layers);
2355
2356 printf("\n");
2357 for (int slx = 0; slx < ss_number_layers; slx++)
2358 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2359 int lx = slx * ts_number_layers + tlx;
2360 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2361 slx, tlx, frame_cnt_layer[lx],
2362 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2363 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2364 }
2365
2366 printf("\n");
2367 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2368 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2369 1000000 * (double)frame_cnt / (double)cx_time);
2370
2371 if (app_input.show_psnr) {
2372 show_psnr(&psnr_stream, 255.0);
2373 }
2374
2375 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2376
2377#if CONFIG_AV1_DECODER
2378 if (app_input.decode) {
2379 if (aom_codec_destroy(&decoder))
2380 die_codec(&decoder, "Failed to destroy decoder");
2381 }
2382#endif
2383
2384#if CONFIG_INTERNAL_STATS
2385 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2386 fclose(stats_file);
2387#endif
2388
2389 // Try to rewrite the output file headers with the actual frame count.
2390 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2391 aom_video_writer_close(outfile[i]);
2392 aom_video_writer_close(total_layer_file);
2393
2394 if (has_non_y4m_input) {
2395 aom_img_free(&raw);
2396 }
2397 return EXIT_SUCCESS;
2398}
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
Describes the aom image descriptor and associated operations.
@ AOM_MIF_KEY_FRAME
Definition aom_image.h:166
@ AOM_CSP_UNKNOWN
Definition aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition aom_image.h:38
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition aomcx.h:1718
#define AOM_MAX_TS_LAYERS
Definition aomcx.h:1720
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
@ AOM_FULL_SUPERFRAME_DROP
Definition aomcx.h:1780
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition aomcx.h:1535
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition aomcx.h:1076
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition aomcx.h:414
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition aomcx.h:474
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition aomcx.h:1284
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition aomcx.h:1294
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition aomcx.h:503
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition aomcx.h:512
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition aomcx.h:1123
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition aomcx.h:1262
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition aomcx.h:1211
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition aomcx.h:1399
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition aomcx.h:1119
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition aomcx.h:1044
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition aomcx.h:1434
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition aomcx.h:1242
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition aomcx.h:676
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition aomcx.h:1365
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition aomcx.h:1548
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition aomcx.h:1289
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition aomcx.h:1065
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition aomcx.h:1115
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition aomcx.h:1094
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition aomcx.h:312
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition aomcx.h:448
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition aomcx.h:703
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition aomcx.h:1556
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition aomcx.h:1414
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition aomcx.h:871
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition aomcx.h:1139
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition aomcx.h:1572
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition aomcx.h:1034
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition aomcx.h:1563
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition aomcx.h:345
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition aomcx.h:1497
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition aomcx.h:1252
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition aomcx.h:1578
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition aom_codec.h:232
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition aom_codec.h:271
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t
Algorithm return codes.
Definition aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition aom_codec.h:288
@ AOM_BITS_8
Definition aom_codec.h:336
@ AOM_BITS_10
Definition aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition aom_decoder.h:129
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition aom_encoder.h:943
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition aom_encoder.h:1016
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition aom_encoder.h:79
@ AOM_CBR
Definition aom_encoder.h:187
@ AOM_KF_AUTO
Definition aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition aom_encoder.h:110
aom active region map
Definition aomcx.h:1634
unsigned int rows
Definition aomcx.h:1637
unsigned int cols
Definition aomcx.h:1638
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition aomcx.h:1636
Codec context structure.
Definition aom_codec.h:315
Encoder output packet.
Definition aom_encoder.h:122
size_t sz
Definition aom_encoder.h:127
enum aom_codec_cx_pkt_kind kind
Definition aom_encoder.h:123
double psnr[4]
Definition aom_encoder.h:145
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
aom_codec_frame_flags_t flags
Definition aom_encoder.h:132
void * buf
Definition aom_encoder.h:126
Encoder configuration structure.
Definition aom_encoder.h:387
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition aom_encoder.h:475
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition aom_encoder.h:540
struct aom_rational g_timebase
Stream timebase units.
Definition aom_encoder.h:489
unsigned int g_usage
Algorithm specific "usage" value.
Definition aom_encoder.h:399
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition aom_encoder.h:705
unsigned int g_h
Height of the frame.
Definition aom_encoder.h:435
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition aom_encoder.h:768
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition aom_encoder.h:623
unsigned int g_threads
Maximum number of threads to use.
Definition aom_encoder.h:407
unsigned int kf_min_dist
Keyframe minimum interval.
Definition aom_encoder.h:777
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition aom_encoder.h:518
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition aom_encoder.h:714
unsigned int g_profile
Bitstream profile to use.
Definition aom_encoder.h:417
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition aom_encoder.h:467
unsigned int g_w
Width of the frame.
Definition aom_encoder.h:426
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition aom_encoder.h:681
unsigned int kf_max_dist
Keyframe maximum interval.
Definition aom_encoder.h:786
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition aom_encoder.h:497
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition aom_encoder.h:668
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition aom_encoder.h:723
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition aom_encoder.h:658
unsigned int rc_target_bitrate
Target data rate.
Definition aom_encoder.h:644
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition aom_encoder.h:549
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition aom_encoder.h:690
Image Descriptor.
Definition aom_image.h:182
aom_img_fmt_t fmt
Definition aom_image.h:183
unsigned int d_w
Definition aom_image.h:197
unsigned int d_h
Definition aom_image.h:198
int num
Definition aom_encoder.h:165
int den
Definition aom_encoder.h:166
aom image scaling mode
Definition aomcx.h:1646
Definition aomcx.h:1723
int temporal_layer_id
Definition aomcx.h:1725
int spatial_layer_id
Definition aomcx.h:1724
Definition aomcx.h:1734
int max_quantizers[32]
Definition aomcx.h:1737
int number_spatial_layers
Definition aomcx.h:1735
int layer_target_bitrate[32]
Definition aomcx.h:1742
int framerate_factor[8]
Definition aomcx.h:1744
int min_quantizers[32]
Definition aomcx.h:1738
int scaling_factor_den[4]
Definition aomcx.h:1740
int number_temporal_layers
Definition aomcx.h:1736
int scaling_factor_num[4]
Definition aomcx.h:1739
Definition aomcx.h:1771
int use_comp_pred[3]
Definition aomcx.h:1774
Definition aomcx.h:1748
int reference[7]
Definition aomcx.h:1764
int refresh[8]
Definition aomcx.h:1767
int ref_idx[7]
Definition aomcx.h:1766